/*    Copyright 2012 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.subcommands;

import java.util.Arrays;
import java.util.List;

import mosdi.util.Alphabet;
import mosdi.util.Log;
import mosdi.util.NamedSequence;
import mosdi.util.SequenceUtils;

public class LengthHistogramSubcommand extends Subcommand {
	
	@Override
	public String usage() {
		return
		super.usage()+" [options] <fasta-file>\n" +
		"\n" +
		"Options:\n" +
		"  -p <pseudocounts>: add pseudo counts to each length count (default: 0.0). May be\n" +
		"                     any floating point number.\n" +
		"  -m <min-length>: Ignore sequences below this length (default: 1)\n" +
		"  -M <max-length>: Ignore sequences above this length (default: unlimited)";
	}
	
	@Override
	public String description() {
		return "Computes a length histogram for a given FASTA file.";
	}

	@Override
	public String name() {
		return "length-histogram";
	}

	private static String printDouble(double d) {
		boolean isIntLike = (d<=Long.MAX_VALUE) && (d>=Long.MIN_VALUE) && (d==(double)((long)d));
		if (isIntLike) {
			return Long.toString((long)d);
		} else {
			return Double.toString(d);
		}
	}
	
	@Override
	public int run(String[] args) {
		parseOptions(args, 1, "p:m:M:");

		// Option dependencies
		// -- none --

		// Mandatory arguments
		String filename = getStringArgument(0);

		// Options
		double pseudoCount = getDoubleOption("p", 0.0);
		int minLengthParameter = getPositiveIntOption("m", -1);
		int maxLengthParameter = getPositiveIntOption("M", -1);
		

		Alphabet alphabet = new Alphabet();
		List<NamedSequence> sequences = null;
		try {
			sequences = SequenceUtils.readFastaFile(filename, alphabet, true);
		} catch (Exception e) {
			Log.errorln(e.toString());
			System.exit(1);
		}
		
		int minLength = Integer.MAX_VALUE;
		int maxLength = 0;
		for (NamedSequence s : sequences) {
			minLength = Math.min(minLength, s.length());
			maxLength = Math.max(maxLength, s.length());
		}
		if (minLengthParameter != -1) minLength = Math.max(minLength, minLengthParameter);
		if (maxLengthParameter != -1) maxLength = Math.min(maxLength, maxLengthParameter);
		double[] histogram = new double[maxLength-minLength+1];
		Arrays.fill(histogram, pseudoCount);
		for (NamedSequence s : sequences) {
			if ((s.length() < minLength) || (s.length() > maxLength)) continue;
			histogram[s.length() - minLength] += 1;
		}
		for (int i=0; i<histogram.length; ++i) {
			if (histogram[i] == 0.0) continue;
			Log.printf(Log.Level.STANDARD, "%d \t%s%n", i+minLength, printDouble(histogram[i]));
		}
		return 0;
	}
}
